# A history of Australian Equities
# Thomas Mathews, Reserve Bank of Australia

# this replicates the charts and data series used in the 'History of Australian Equities' Research Discussion Paper

# Inputs:
#         One .csv file with the quarterly Sydney Stock Exchange data ('Sydney Stock Exchange Data.csv')
#         One .csv file with the annual Sydney Stock Exchange data ('Sydney Stock Exchange Data annual.csv')
#         One .csv file with Sydney Stock Exchange company information ('info input.csv')
#         One .csv file with the modern time series extensions ('external input.csv'): data from Refinitiv and other sources
#         One .xslx file with the modern company-level data ('Modern company data') : data from Bloomberg
#         One .csv file with the modern company-level info ('Modern Info') (names and GICS codes) from Bloomberg

# Output: the charts aren't actually drawn in R. Instead, the data for them is output as a range of csvs in a 'graphs' subfolder

# Load packages and helper functions

library(tidyr)
library(plyr)
library(dplyr)
library(readxl)
library(tibble)
library(zoo)
library(lubridate)


source("additional functions.R") #defines some additional functions, all of which start with `udf_`

# load the data
source('load data.R')


# calculate (un-matched) aggregates

time_series <- SSE_data_ordinaries %>%
  group_by(Dates, Broad.sector) %>%
  summarise_if(is.numeric, sum, na.rm = T) %>%
  as_tibble()

#add on a 'total' row for each date
time_series <- rbind(
  time_series,
  SSE_data_ordinaries %>%
    group_by(Dates) %>%
    summarise_if(is.numeric, sum, na.rm = T) %>%
    mutate(Broad.sector = "Total") %>%
    select(names(time_series))
) %>%
  arrange(Dates) %>%
  select(-c(Price, Market.Capitalisation.Rank)) %>% # doesn't really make sense to sum these
  na_if(0)

# calculate matched ratios using a user-defined function
# matched means every company in the numerator has to be in the denominator for a given period (and vice versa)
# but the composition of the market can still change year-to-year


# these are the arguments for the aggregation function that we are going to pass in order
varname_list <-
  c(
    "Price_book",
    "Price_earnings",
    "Dividend_yield",
    "EPS",
    "DPS",
    "NTA_per_share",
    "Payout_ratio"
  )
numerator_list <-
  c(
    "Market.Capitalisation",
    "Market.Capitalisation",
    "Dividend",
    "Profit",
    "Dividend",
    "Book",
    "Dividend"
  )
denominator_list <-
  c("Book",
    "Profit",
    "Market.Capitalisation",
    "Shares",
    "Shares",
    "Shares",
    "Profit")
scale_factors <- c(1, 1, 100, 1, 1, 1, 100)

time_series <- merge(time_series,
                     Reduce(
                       function(x, y)
                         merge(x, y, all = T, by = c("Dates", "Broad.sector")),
                       lapply(seq_along(numerator_list),
                              function(x)
                                udf_aggregateRatio(
                                  SSE_data_ordinaries,
                                  numerator = numerator_list[x],
                                  denominator = denominator_list[x],
                                  varname = varname_list[x],
                                  scale = scale_factors[x]
                                ))
                     ), all = T) %>%
  as_tibble()

rm(varname_list, numerator_list, denominator_list, scale_factors)

# dividends assuming companies with NA dividend paid 0 dividend

time_series <- time_series %>% 
  merge(SSE_data %>% 
          mutate(Dividend = ifelse(is.na(Dividend)&!is.na(Price), 0, Dividend)) %>% 
          udf_aggregateRatio(numerator = "Dividend", denominator = "Market.Capitalisation", varname = "Dividend_yield_withzeroes", scale = 100),
        all = T) %>% 
  as_tibble()

# calculate price and accumulation indices

sectors <- c("Total", "Resources", "Financial", "Other")

time_series <- merge(time_series,
                     Reduce(rbind, lapply(sectors,
                                          function(x)
                                            udf_price_index(SSE_data_ordinaries, x))), all = T) %>%
  as_tibble() %>%
  group_by(Broad.sector) %>%
  arrange(Dates) %>%
  mutate(
    Price_index_gr = Price_index / lag(Price_index),
    Accumulation_index_gr = Price_index_gr + (1 + Dividend_yield/100) ^ (1/4) - 1,
    # dividend yield is annualised, but our data are quarterly, hence to the 1/4 power
    Accumulation_index_gr = ifelse(is.na(Accumulation_index_gr), 1, Accumulation_index_gr),
    Accumulation_index = cumprod(Accumulation_index_gr)
  ) %>%
  select(-Price_index_gr,-Accumulation_index_gr)

rm(sectors)

# extend total returns and price indices series with modern data in a separate dataframe
# for use in calculating average returns on equities and some graphs

total_returns_extended <- time_series %>%
  select(c(Dates, Broad.sector, Price_index)) %>%
  merge(
    external_series_quarterly %>%
      select(
        c(
          Dates,
          DS_ASX_index,
          DS_RES_index,
          DS_FIN_index,
          DS_OTHER_index
        )
      ) %>%
      rename(
        Total = DS_ASX_index,
        Resources = DS_RES_index,
        Financial = DS_FIN_index,
        Other = DS_OTHER_index
      ) %>%
      gather(key = Broad.sector, value = Price_index_modern,-Dates),
    all = T
  ) %>%
  merge(
    time_series %>%
      select(c(Dates, Broad.sector, Accumulation_index)) %>%
      merge(
        external_series_quarterly %>%
          select(c(
            Dates, DS_ASX_TR, DS_RES_TR, DS_FIN_TR, DS_OTHER_TR
          )) %>%
          rename(
            Total = DS_ASX_TR,
            Resources = DS_RES_TR,
            Financial = DS_FIN_TR,
            Other = DS_OTHER_TR
          ) %>%
          gather(key = Broad.sector, value = Accumulation_index_modern,-Dates),
        all = T
      )
  ) %>%
  group_by(Broad.sector) %>%
  mutate_if(is.numeric, function(x) {
    #turn them into growth rates, then back into indices (to splice on the modern series)
    y <- x / lag(x)
    y <- ifelse(is.na(y), 1, y)
    return (y)
  }) %>%
  mutate(
    Price_index = ifelse(
      Dates > max(time_series$Dates),
      Price_index_modern,
      Price_index
    ),
    Accumulation_index = ifelse(
      Dates > max(time_series$Dates),
      Accumulation_index_modern,
      Accumulation_index
    )
  ) %>%
  mutate_if(is.numeric, cumprod) %>%
  select(c(Dates, Broad.sector, Price_index, Accumulation_index))


# output graph data

source('graphs.R')

# raw data for Data appendix

source('data Appendix.R')
